////////////////////////////////////////////////////////////////////////////////
//////// LEADERS DATASET
////////////////////////////////////////////////////////////////////////////////

// This do-file creates a database of leaders with the dates they enter/exit power.

////////////////////////////////////////////////////////////////////////////////
//////// a. Extracting data from V-Dem
////////////////////////////////////////////////////////////////////////////////

use country_name year historical_date v2exnamhos v2exnamhog v2exhoshog using "$project_path/data/1_input/elections/V-Dem/vdem_cd_extract.dta", clear

sort country_name historical_date
ren country_name country
ren v2exnamhos hos
ren v2exnamhog hog
ren v2exhoshog hoshog
ren historical_date date

// Sometimes the HOS and HOG are the same person, but V-Dem only mentions one
replace hog = hos if hoshog==1 & hog==""
replace hos = hog if hoshog==1 & hos==""
drop if hos=="" & hog==""
drop hoshog
format country hos hog %20s

// We seperate HOS and HOG
expand(2)
gen leader = ""
gen leader_type = ""
bysort country date : replace leader_type = "HOS" if _n==1
bysort country date : replace leader_type = "HOG" if _n==2
replace leader = hos if leader_type=="HOS"
replace leader = hog if leader_type=="HOG"
drop hos hog
drop if leader==""
format leader %20s

// Cleaning country names
replace country="Bosnia-Herz"  if country=="Bosnia and Herzegovina"
replace country="Myanmar"  if country=="Burma/Myanmar"
replace country="C. Verde Is."  if country=="Cape Verde"
replace country="Cent. Af. Rep."  if country=="Central African Republic"
replace country="PRC"  if country=="China"
replace country="Comoro Is."  if country=="Comoros"
replace country="Czech Rep."  if country=="Czech Republic"
replace country="Congo (DRC)"  if country=="Democratic Republic of the Congo"
replace country="Dom. Rep."  if country=="Dominican Republic"
replace country="Eq. Guinea"  if country=="Equatorial Guinea"
replace country="Swaziland"  if country=="Eswatini"
replace country="GDR"  if country=="German Democratic Republic"
replace country="FRG/Germany"  if country=="Germany"
replace country="PRK"  if country=="North Korea"
replace country="Macedonia"  if country=="North Macedonia"
replace country="P. N. Guinea"  if country=="Papua New Guinea"
replace country="Congo"  if country=="Republic of the Congo"
replace country="Solomon Is."  if country=="Solomon Islands"
replace country="S. Africa"  if country=="South Africa"
replace country="Korea, Republic of"  if country=="South Korea"
replace country="Gambia"  if country=="The Gambia"
replace country="Trinidad-Tobago"  if country=="Trinidad and Tobago"
replace country="UAE"  if country=="United Arab Emirates"
replace country="UK"  if country=="United Kingdom"
replace country="USA"  if country=="United States of America"
drop if country=="Palestine/British Mandate" & year==1948
drop if country=="Palestine/Gaza"
drop if country=="Palestine/West Bank" & year<=1948
replace country="Occupied Palestinian Territories"  if country=="Palestine/British Mandate"
replace country="Occupied Palestinian Territories"  if country=="Palestine/West Bank"
drop if country=="Republic of Vietnam" & year>1944
drop if country=="Baden"
drop if country=="Bavaria"
drop if country=="Brunswick"
drop if country=="Hamburg"
drop if country=="Hanover"
drop if country=="Hesse-Darmstadt"
drop if country=="Hesse-Kassel"
drop if country=="Hong Kong"
drop if country=="Mecklenburg Schwerin"
drop if country=="Modena"
drop if country=="Nassau"
drop if country=="Oldenburg"
drop if country=="Papal States"
drop if country=="Parma"
drop if country=="Piedmont-Sardinia"
drop if country=="Republic of Vietnam"
drop if country=="Saxe-Weimar-Eisenach"
drop if country=="Saxony"
drop if country=="Tuscany"
drop if country=="Two Sicilies"
drop if country=="Würtemberg"
drop if country=="Zanzibar"

// When the first leadership change of the year does not take place on Jan, 1, we create an observation for Jan, 1 with the leader in place on Dec, 31 the year before. This way, we have a list of leaders for each year.
order country leader_type year date leader
sort country leader_type year date
gen to_expand = 1
bysort country leader_type : replace to_expand = 2 if month(date)==12 & day(date)==31 & date[_n+1]-date!=1
expand(to_expand)
bysort country leader_type year date : replace date = date + _n - 1
replace year = year(date)
drop to_expand
bysort country leader_type year : drop if leader==leader[_n-1]

// We reshape the data
sort country leader_type year date
by country leader_type year : gen leader_no = _n
order country leader_type year leader date
ren leader leader_
ren date leader_start_
reshape wide leader_ leader_start_, i(country leader_type year) j(leader_no)

gen source = "V-Dem"

tempfile leaders_vdem
save `leaders_vdem'

////////////////////////////////////////////////////////////////////////////////
//////// b. Adding data from other sources and selecting a source
////////////////////////////////////////////////////////////////////////////////

use using "$project_path/data/1_input/other/leaders/leaders_wikipedia.dta", clear
append using "$project_path/data/1_input/other/leaders/leaders_wikidata.dta"
append using "$project_path/data/1_input/other/leaders/leaders_nohlen.dta"
append using `leaders_vdem'
format country leader_1 leader_2 leader_3 leader_4 leader_5 leader_6 leader_7 leader_8 leader_9 leader_10 %20s
format leader_type %10s

// We use the following priority order for sources: V-Dem > Wikipedia > Wikidata > Nohlen
gen source_priority = .
replace source_priority = 1 if source=="V-Dem"
replace source_priority = 2 if source=="Wikipedia"
replace source_priority = 3 if source=="Wikidata"
replace source_priority = 4 if source=="Nohlen"

// Finding best source
bysort country year leader_type : egen best_source = min(source_priority)

// Adjusting manually the best source in some cases
replace best_source=2 if country=="Montenegro" & year==1998 // V-Dem has data on Yougoslavia under the name 'Serbia'
replace best_source=2 if country=="Serbia" & year>=1991 & year<=2003 // V-Dem has data on Yougoslavia under the name 'Serbia'
replace best_source=3 if country=="Japan" & year>=1945 & year<=1952 // V-Dem considers the Supreme Commander for the Allied Powers as the HOG in Japan. We change the source.
replace best_source=2 if country=="San Marino" // San Marino: collective body, Nohlen only gives one member

keep if source_priority==best_source
drop source_priority best_source

// When the source is Nohlen, we can only allocate dates when there is a single leader
replace flag_unreliable = 1 if source=="Nohlen" & leader_1!="" & leader_2!=""
replace leader_start_1 = date("1jan"+ string(year), "DMY") if source=="Nohlen" & flag_unreliable!=1

replace flag_unreliable = . if flag_unreliable==0

tempfile leaders
save `leaders'

////////////////////////////////////////////////////////////////////////////////
//////// c. Harmonizing names
////////////////////////////////////////////////////////////////////////////////

// Importing names to be changed
import excel using "$project_path/data/1_input/other/leaders/name_harmonization.xlsx", firstrow clear
local total_names_updated = _N
forvalues k = 1/`total_names_updated' {
	local country`k' = country in `k'
	local old_name`k' = old_name in `k'
	local new_name`k' = new_name in `k'
}

// Harmonizing names
use `leaders', clear
forvalues k = 1/`total_names_updated' {
	forvalues j = 1/10 {
		qui replace leader_`j' = "`new_name`k''" if country=="`country`k''" & leader_`j'=="`old_name`k''"
	}
}

*** Saving output

sort country leader_type year 
compress
format country leader_1 leader_2 leader_3 leader_4 leader_5 leader_6 leader_7 leader_8 leader_9 leader_10 %20s
format leader_type %10s
ren country Country
ren year Year
save "$project_path/data/2_intermediary/leaders/leader_years.dta",replace

